The benchmarks in the present analysis rely on our own previous analysis and annotation of papers, as well as on open resources such as Papers With Code, including data from from several repositories (e.g, EFF, NLP-progress, SQuAD, RedditSota, etc.).
We focus on “interest” rather than “progress” for AI benchmarks as this is something we can compute using some proxies.
For this analysis we use the normalised hits obtained on AItopics per benchmark over the last decade (2008-2019). Note that the results from 2019 are incomplete.
prepareVis <- function(data, rangeMean = 5, norm = T, years = 2008:2019) {
set.seed(288)
interest.df <- data
ini <- which(colnames(interest.df) == paste0("X",years[1]))
fin <- which(colnames(interest.df) == paste0("X",years[length(years)]))
colnames(interest.df)[ini:fin] <- as.character(2008:2019)
r <- 1:((length(years)-rangeMean-1))
years.range <- years[-r]
interest.df = colwise(type.convert)(interest.df)
interest.df$mean.Interest <- rowSums(select(interest.df, as.character(years.range)))/(rangeMean+1)
# filter(interest.df, is.na(R2))
# filter(interest.df, R2=="ERROR")
# # interest.df <- filter(interest.df, !is.na(R2))
interest.df$category <- NA
for(i in 1:nrow(interest.df)){
interest.df$category[i] <- str_trim(str_split(interest.df$TaskHierarchies[i], pattern = ">")[[1]][1])
} # unique(interest.df$category)
cogAbs <- c("MP", "SI", "VP", "AP", "AS", "PA", "CE", "CO", "EC", "NV", "CL", "QL", "MS", "MC")
interest <- select(interest.df, one_of(c("keyword", "category",cogAbs,"mean.Interest")))
keywords <- interest$keyword
categories <- interest$category
rownames(interest) <- keywords
interest <- interest[,-(1:2)]
interest["ILSVRC","QL"] <- 0 # <------------------- check it!
interest = colwise(type.convert)(interest)
rownames(interest) <- keywords
interest[interest$mean.Interest ==0, "mean.Interest"]<- 0.0000000000001
interest.mean <- interest$mean.Interest
if(norm){
# interest.mean <- (interest.mean-min(interest.mean))/(max(interest.mean)-min(interest.mean))
interest <- select(interest, -mean.Interest)
interest.mean.norm <- normalize(interest.mean+0.000001, method = "scale", range = c(0,1))
range(interest.mean.norm)
interest.pond <- interest * interest.mean.norm
return(list(interest.pond, interest, interest.mean, interest.mean.norm))
}else{
interest <- select(interest, -mean.Interest)
interest.pond <- interest * interest.mean
return(list(interest.pond, interest, interest.mean, interest.mean))
}
}
plotVis <- function(data, categories, norm = T){
set.seed(288)
# shapes = c("square", "triangle", "box", "circle", "dot", "star",
# "ellipse", "database", "text", "diamond", "square", "triangle","box")
# vis$nodes$shape <- c(shapes[as.numeric(as.factor(categories))], rep("#dot",14))
colours = c("1" = "blalck", "2" = "#543005","3" = "#8c510a","4" = "#bf812d",
"5" = "#dfc27d","6" = "#f6e8c3","7" = "#f5f5f5","8" = "#c7eae5",
"9" = "#80cdc1", "10" = "#35978f", "11" = "#01665e", "12" = "#003c30", "13" = "#FAFAFA")
vis <- toVisNetworkData(graph_from_incidence_matrix(data, directed = F, weighted = T))
vis$nodes$value = c(rep(10, nrow(vis$nodes)-14), colSums(data)*10000)
vis$nodes$title <- vis$nodes$label
vis$nodes$category <- c(categories, rep("CogAb", 14))
vis$nodes$group <- vis$nodes$category
vis$nodes$color <- colours[as.numeric(as.factor(vis$nodes$category))]
# vis$edges$value <- log(vis$edges$weight+1)
vis$edges$value <- log(normalize(vis$edges$weight+0.00001, method = "range", range = c(0,1))+0.00001)
# vis$edges$width <- vis$edges$weight
v <- visNetwork(vis$nodes, vis$edges, height = "1000px", width = "100%") %>%
visEdges(arrows = "to", color = list(color = 'rgba(70,130,180,0.3)', highlight ="#4682B4")) %>%
visIgraphLayout(
physics = F,
randomSeed = 2017,
layout = "layout_with_fr"
) %>%
visInteraction(navigationButtons = TRUE) %>%
visOptions(selectedBy = "group",highlightNearest = TRUE )
return(v)
}
Red: Last decade (2008-2019)
Green: Last lustrum (2014-2019)
Blue: Last Year (2019)
interest.df <- read.xlsx2("interest_kw_processed_raw_slope.xlsx", sheetIndex = 1)
df.interest <- data.frame(Benchmark = rownames(prepareVis(interest.df, length(years)-1, norm = F)[[1]]),
Last.Decade = prepareVis(interest.df, length(years)-1, norm = F)[[3]],
Last.Lustrum = prepareVis(interest.df, 5, norm = F)[[3]],
Last.Year = prepareVis(interest.df, 1, norm = F)[[3]])
df.interest.m <- melt(df.interest, id.vars = "Benchmark")
a <- ggplot(df.interest.m, aes(reorder(Benchmark,value), value, colour = variable)) +
geom_point(alpha = 1/3, size = 3.5) + xlab("Mean Interest") + ylab("") +
coord_flip() + theme_minimal() + theme(legend.position="bottom")
b <- ggplot(df.interest.m, aes(reorder(Benchmark,value), log(value), colour = variable)) +
geom_point(alpha = 1/3, size = 3.5) + xlab("log(Mean Interest)") + ylab("") +
coord_flip() + theme_minimal() + theme(legend.position="bottom")
a
b
Note that we can perform exactly the same analysis focusing on different (ranges of) years and obtaining the same graph but the width of the edges may vary (a little bit).
(Network are interactive!)
interest.df$category <- NA
for(i in 1:nrow(interest.df)){
interest.df$category[i] <- str_trim(str_split(interest.df$TaskHierarchies[i], pattern = ">")[[1]][1])
}
categories <- interest.df$category
interest.pond.All <- prepareVis(interest.df, length(years)-1, norm = F)
plotVis(interest.pond.All[[1]], categories)
Almost unnoticeable differences regarding the with of the edges (due to de size of the graph and the small variations regarding mean interest).
interest.pond.5 <- prepareVis(interest.df, 5, norm = F)
plotVis(interest.pond.5[[1]], categories)
barplot(colSums(interest.pond.All[[1]]), main = "Total sum") # interest.pondered
barplot(colSums(interest.pond.All[[2]]), main = "Total sum (pondered by mean interest)") # interest.sumcols
barplot(colSums(interest.pond.5[[1]]), main = "Total sum") # interest.pondered
barplot(colSums(interest.pond.5[[2]]), main = "Total sum (pondered by mean interest)") # interest.sumcols
(Groups from https://paperswithcode.com/)
plotIterest.Cat(interest.m, "Computer Vision")
plotIterest.Cat(interest.m, "Graphs")
plotIterest.Cat(interest.m, "Natural Language Processing")
plotIterest.Cat(interest.m, "Playing Games")
plotIterest.Cat(interest.m, "Miscellaneous")
plotIterest.Cat(interest.m, "Medical")
plotIterest.Cat(interest.m, "Methodology")
plotIterest.Cat(interest.m, "Speech")
plotIterest.Cat(interest.m, "Reasoning")
plotIterest.Cat(interest.m, "Time Series")
plotIterest.Cat(interest.m, "Computer Code")